## clear environment
rm(list = ls())

## load libraries
library(wikipediatrend)
library(tidyverse)

## function to turn eg "T%C3%BAnez" into "Túnez"
## necessary so that looping through to get page views works later on
url_decode_utf = function(x) {
  y = urltools::url_decode(x)
  Encoding(y) = "UTF-8"
  y
}

africa_dict <- data.frame(matrix(c("Adrar_Province", "Adrar", "Algeria", 
                                   "Chlef_Province", "Chlef", "Algeria", 
                                   "Laghouat_Province", "Laghouat", "Algeria", 
                                   "Oum_El_Bouaghi_Province", "Oum el Bouaghi", "Algeria",  
                                   "Batna_Province", "Batna", "Algeria", 
                                   "Béjaïa_Province", "Béjaïa", "Algeria", 
                                   "Biskra_Province", "Biskra", "Algeria", 
                                   "Béchar_Province", "Béchar", "Algeria", 
                                   "Blida_Province", "Blida", "Algeria", 
                                   "Bouïra_Province", "Bouira", "Algeria", 
                                   "Tamanrasset_Province", "Tamanghasset", "Algeria", 
                                   "Tébessa_Province", "Tébessa", "Algeria", 
                                   "Tlemcen_Province", "Tlemcen", "Algeria", 
                                   "Tiaret_Province", "Tiaret", "Algeria", 
                                   "Tizi_Ouzou_Province", "Tizi Ouzou", "Algeria", 
                                   "Algiers_Province", "Alger", "Algeria", 
                                   "Djelfa_Province", "Djelfa", "Algeria", 
                                   "Jijel_Province", "Jijel", "Algeria", 
                                   "Sétif_Province", "Sétif", "Algeria", 
                                   "Saïda_Province", "Saïda", "Algeria", 
                                   "Skikda_Province", "Skikda", "Algeria", 
                                   "Sidi_Bel_Abbès_Province", "Sidi Bel Abbès", "Algeria", 
                                   "Annaba_Province", "Annaba", "Algeria", 
                                   "Guelma_Province", "Guelma", "Algeria", 
                                   "Constantine_Province", "Constantine", "Algeria", 
                                   "Médéa_Province", "Médéa", "Algeria", 
                                   "Mostaganem_Province", "Mostaganem", "Algeria", 
                                   "M%27Sila_Province", "M'Sila", "Algeria", ## see if "M'Sila_Province" works too
                                   "Mascara_Province", "Mascara", "Algeria", 
                                   "Ouargla_Province", "Ouargla", "Algeria", 
                                   "Oran_Province", "Oran", "Algeria", 
                                   "El_Bayadh_Province", "El Bayadh", "Algeria", 
                                   "Illizi_Province", "Illizi", "Algeria", 
                                   "Bordj_Bou_Arréridj_Province", "Bordj Bou Arréridj", "Algeria", 
                                   "Boumerdès_Province", "Boumerdès", "Algeria", 
                                   "El_Taref_Province", "El Tarf", "Algeria", 
                                   "Tindouf_Province", "Tindouf", "Algeria", 
                                   "Tissemsilt_Province", "Tissemsilt", "Algeria", 
                                   "El_Oued_Province", "El Oued", "Algeria", 
                                   "Khenchela_Province", "Khenchela", "Algeria", 
                                   "Souk_Ahras_Province", "Souk Ahras", "Algeria", 
                                   "Tipaza_Province", "Tipaza", "Algeria", 
                                   "Mila_Province", "Mila", "Algeria", 
                                   "Aïn_Defla_Province", "Aïn Defla", "Algeria", 
                                   "Naâma_Province", "Naâma", "Algeria", 
                                   "Aïn_Témouchent_Province", "Aïn Témouchent", "Algeria", 
                                   "Ghardaïa_Province", "Ghardaïa", "Algeria", 
                                   "Relizane_Province", "Relizane",  "Algeria", 
                                   ####################
                                   ## angola "Angola", 
                                   ####################
                                   "Bengo_Province", "Bengo", "Angola", 
                                   "Benguela_Province", "Benguela", "Angola", 
                                   "Bié_Province", "Bié", "Angola", 
                                   "Cabinda_Province", "Cabinda", "Angola", 
                                   "Cuando_Cubango_Province", "Cuando Cubango", "Angola", 
                                   "Cuanza_Norte_Province", "Cuanza Norte", "Angola", 
                                   "Cuanza_Sul_Province", "Cuanza Sul", "Angola", 
                                   "Cunene_Province", "Cunene", "Angola", 
                                   "Huambo_Province", "Huambo", "Angola", 
                                   "Huíla_Province", "Huíla", "Angola", 
                                   "Luanda_Province", "Luanda", "Angola", 
                                   "Lunda_Norte_Province", "Lunda Norte", "Angola", 
                                   "Lunda_Sul_Province", "Lunda Sul", "Angola", 
                                   "Malanje_Province", "Malanje", "Angola", 
                                   "Moxico_Province", "Moxico", "Angola", 
                                   "Namibe_Province", "Namibe", "Angola", 
                                   "Uíge_Province", "Uíge", "Angola", 
                                   "Zaire_Province", "Zaire", "Angola", 
                                   ####################
                                   ## benin "Benin", 
                                   ####################
                                   "Alibori_Department", "Alibori", "Benin", 
                                   "Atakora_Department", "Atakora", "Benin", 
                                   "Atlantique_Department", "Atlantique", "Benin", 
                                   "Borgou_Department", "Borgou", "Benin", 
                                   "Collines_Department", "Collines", "Benin", 
                                   "Kouffo_Department", "Kouffo", "Benin", 
                                   "Donga_Department", "Donga", "Benin", 
                                   "Littoral_Department", "Littoral", "Benin", 
                                   "Mono_Department", "Mono", "Benin", 
                                   "Ouémé_Department", "Ouémé", "Benin", 
                                   "Plateau_Department", "Plateau", "Benin", 
                                   "Zou_Department", "Zou", "Benin", 
                                   ####################
                                   ## botswana "Botswana", 
                                   ####################
                                   "Gaborone", "Gaborone", "Botswana", 
                                   "Francistown", "Francistown", "Botswana", 
                                   "Lobatse", "Lobatse", "Botswana", 
                                   "Selebi-Phikwe", "Selibe Phikwe", "Botswana", 
                                   "Jwaneng", "Jwaneng", "Botswana", 
                                   # "Orapa", "North-West", # missing in gadm, either leave out or attach to north-west 
                                   "Sowa,_Botswana", "Sowa", "Botswana", 
                                   "Southern_District_(Botswana)", "Southern", "Botswana", 
                                   "South-East_District_(Botswana)", "South-East", "Botswana", 
                                   "Kweneng_District", "Kweneng", "Botswana", 
                                   "Kgatleng_District", "Kgatleng", "Botswana", 
                                   "Central_District_(Botswana)", "Central", "Botswana", 
                                   "North-East_District_(Botswana)", "North-East", "Botswana", 
                                   "Ngamiland_District", "North-West", "Botswana", # has to be north-west according to map
                                   "Chobe_District", "Chobe", "Botswana", 
                                   "Ghanzi_District", "Ghanzi", "Botswana", 
                                   "Kgalagadi_District", "Kgalagadi", "Botswana", 
                                   ####################
                                   ## burkina faso "Burkina Faso", 
                                   ####################
                                   "Boucle_du_Mouhoun_Region", "Boucle du Mouhoun", "Burkina Faso", 
                                   "Cascades_Region", "Cascades", "Burkina Faso", 
                                   "Centre_Region,_Burkina_Faso", "Centre", "Burkina Faso", 
                                   "Centre-Est_Region", "Centre-Est", "Burkina Faso", 
                                   "Centre-Nord_Region", "Centre-Nord", "Burkina Faso", 
                                   "Centre-Ouest_Region", "Centre-Ouest", "Burkina Faso", 
                                   "Centre-Sud_Region", "Centre-Sud", "Burkina Faso", 
                                   "Est_Region_(Burkina_Faso)", "Est", "Burkina Faso", 
                                   "Hauts-Bassins_Region", "Haut-Bassins", "Burkina Faso", 
                                   "Nord_Region_(Burkina_Faso)", "Nord", "Burkina Faso", 
                                   "Plateau-Central_Region", "Plateau-Central", "Burkina Faso", 
                                   "Sahel_Region", "Sahel", "Burkina Faso", 
                                   "Sud-Ouest_Region_(Burkina_Faso)", "Sud-Ouest", "Burkina Faso", 
                                   ####################
                                   ## burundi "Burundi", 
                                   ####################
                                   "Cankuzo_Province", "Cankuzo", "Burundi", 
                                   "Gitega_Province", "Gitega", "Burundi", 
                                   "Rutana_Province", "Rutana", "Burundi", 
                                   "Ruyigi_Province","Ruyigi", "Burundi", 
                                   "Karuzi_Province", "Karuzi", "Burundi", 
                                   "Kayanza_Province", "Kayanza", "Burundi", 
                                   "Kirundo_Province", "Kirundo", "Burundi", 
                                   "Muyinga_Province", "Muyinga", "Burundi", 
                                   "Ngozi_Province", "Ngozi", "Burundi", 
                                   "Bururi_Province", "Bururi", "Burundi", 
                                   "Makamba_Province", "Makamba", "Burundi", 
                                   # "Rumonge_Province", ## only established in 2015 and not yet part of gadm
                                   "Bubanza_Province", "Bubanza", "Burundi", 
                                   "Bujumbura_Mairie_Province", "Bujumbura Mairie", "Burundi", 
                                   "Bujumbura_Rural_Province", "Bujumbura Rural", "Burundi", 
                                   "Cibitoke_Province", "Cibitoke", "Burundi", 
                                   "Muramvya_Province", "Muramvya", "Burundi", 
                                   "Mwaro_Province", "Mwaro", "Burundi", 
                                   ####################
                                   ## cameroon "Cameroon", 
                                   ####################
                                   "Adamawa_Region", "Adamaoua", "Cameroon", 
                                   "Centre_Region_(Cameroon)", "Centre", "Cameroon", 
                                   "East_Region_(Cameroon)", "Est", "Cameroon", 
                                   "Far_North_Region,_Cameroon", "Extrême-Nord", "Cameroon", 
                                   "Littoral_Region_(Cameroon)", "Littoral", "Cameroon", 
                                   "North_Region_(Cameroon)", "Nord", "Cameroon", 
                                   "Northwest_Region_(Cameroon)", "Nord-Ouest", "Cameroon", 
                                   "South_Region_(Cameroon)", "Sud", "Cameroon", 
                                   "Southwest_Region_(Cameroon)", "Sud-Ouest", "Cameroon", 
                                   "West_Region_(Cameroon)", "Ouest", "Cameroon", 
                                   ####################
                                   ## cap verde "Cabo Verde", 
                                   ####################
                                   "Tarrafal,_Cape_Verde_(municipality)", "Tarrafal", "Cabo Verde", 
                                   "São_Miguel,_Cape_Verde", "São Miguel", "Cabo Verde", 
                                   "São_Salvador_do_Mundo,_Cape_Verde", "São Salvador do Mundo", "Cabo Verde", 
                                   "Santa_Cruz,_Cape_Verde", "Santa Cruz", "Cabo Verde", 
                                   "São_Domingos,_Cape_Verde_(municipality)", "São Domingos", "Cabo Verde", 
                                   "Praia,_Cape_Verde_(municipality)", "Praia", "Cabo Verde", 
                                   "Ribeira_Grande_de_Santiago,_Cape_Verde", "Ribeira Grande de Santiago", "Cabo Verde", 
                                   "São_Lourenço_dos_Órgãos,_Cape_Verde", "São Lourenço dos Órgãos", "Cabo Verde", 
                                   "Santa_Catarina,_Cape_Verde", "Santa Catarina", "Cabo Verde", 
                                   "Brava,_Cape_Verde", "Brava", "Cabo Verde", 
                                   "São_Filipe,_Cape_Verde_(municipality)", "São Filipe", "Cabo Verde", 
                                   "Santa_Catarina_do_Fogo,_Cape_Verde", "Santa Catarina do Fogo", "Cabo Verde", 
                                   "Mosteiros,_Cape_Verde_(municipality)", "Mosteiros", "Cabo Verde", 
                                   "Maio,_Cape_Verde", "Maio", "Cabo Verde", 
                                   "Boa_Vista,_Cape_Verde", "Boa Vista", "Cabo Verde", 
                                   "Sal,_Cape_Verde", "Sal", "Cabo Verde", 
                                   "Ribeira_Brava,_Cape_Verde_(municipality)", "Ribeira Brava", "Cabo Verde", 
                                   "Tarrafal_de_São_Nicolau_(municipality)", "Tarrafal de São Nicolau", "Cabo Verde", 
                                   "São_Vicente,_Cape_Verde", "São Vicente", "Cabo Verde", 
                                   "Porto_Novo,_Cape_Verde_(municipality)", "Porto Novo", "Cabo Verde", 
                                   "Ribeira_Grande,_Cape_Verde_(municipality)", "Ribeira Grande", "Cabo Verde", 
                                   "Paul,_Cape_Verde", "Paúl", "Cabo Verde", 
                                   ####################
                                   ## central african republic "Central African Republic", 
                                   ####################
                                   "Bangui", "Bangui", "Central African Republic", 
                                   "Mbomou", "Mbomou", "Central African Republic", 
                                   "Basse-Kotto", "Basse-Kotto", "Central African Republic", 
                                   "Kémo", "Kémo", "Central African Republic", 
                                   "Nana-Mambéré", "Nana-Mambéré", "Central African Republic", 
                                   "Ouham", "Ouham", "Central African Republic", 
                                   "Sangha-Mbaéré", "Sangha-Mbaéré", "Central African Republic", 
                                   "Lobaye", "Lobaye", "Central African Republic", 
                                   "Ombella-M%27Poko", "Ombella-M'Poko", "Central African Republic", ## see if Ombella-M'Poko
                                   "Ouham-Pendé", "Ouham-Pendé", "Central African Republic", 
                                   "Haut-Mbomou", "Haut-Mbomou", "Central African Republic", 
                                   "Ouaka", "Ouaka", "Central African Republic", 
                                   "Haute-Kotto", "Haute-Kotto", "Central African Republic", 
                                   "Bamingui-Bangoran", "Bamingui-Bangoran", "Central African Republic", 
                                   "Vakaga", "Vakaga", "Central African Republic", 
                                   "Nana-Grébizi", "Nana-Grébizi", "Central African Republic", 
                                   "Mambéré-Kadéï", "Mambéré-Kadéï", "Central African Republic", 
                                   ####################
                                   ## chad "Chad", 
                                   ####################
                                   "Batha_(region)", "Batha", "Chad", 
                                   "Chari-Baguirmi_(region)", "Chari-Baguirmi", "Chad", 
                                   "Hadjer-Lamis_(region)", "Hadjer-Lamis", "Chad", 
                                   "Wadi_Fira_(region)", "Wadi Fira", "Chad", 
                                   "Bahr_el_Gazel_(region_of_Chad)", "Barh el Ghazel", "Chad", 
                                   "Borkou_(region_of_Chad)", "Borkou", "Chad", 
                                   "Ennedi-Est_(region)", "Ennedi Est", "Chad", 
                                   "Ennedi-Ouest_(region)", "Ennedi Ouest", "Chad", 
                                   "Guéra_(region)", "Guéra", "Chad", 
                                   "Kanem_(region)","Kanem", "Chad", 
                                   "Lac_(region)", "Lac", "Chad", 
                                   "Logone_Occidental_(region)", "Logone Occidental", "Chad", 
                                   "Logone_Oriental_(region)", "Logone Oriental", "Chad", 
                                   "Mandoul_Region", "Mandoul", "Chad", 
                                   "Mayo-Kebbi_Est", "Mayo-Kebbi Est", "Chad", 
                                   "Mayo-Kebbi_Ouest_Region", "Mayo-Kebbi Ouest", "Chad", 
                                   "Moyen-Chari_(region)", "Moyen-Chari", "Chad", 
                                   "Ouaddaï_Region", "Ouaddaï", "Chad", 
                                   "Salamat_(region)", "Salamat", "Chad", 
                                   "Sila_Region", "Sila", "Chad", 
                                   "Tandjilé_(region)", "Tandjilé", "Chad", 
                                   "Tibesti_Region", "Tibesti", "Chad", 
                                   "N%27Djamena", "Ville de N'Djamena", "Chad",  ## see if N'Djamena
                                   ####################
                                   ## comoros "Comoros", 
                                   ####################
                                   "Anjouan", "Nzwani", "Comoros", 
                                   "Grande_Comore", "Njazídja", "Comoros", 
                                   "Mohéli", "Mwali", "Comoros",
                                   ####################
                                   ## democratic republic congo "Democratic Republic of the Congo", 
                                   ####################
                                   "Kinshasa", "Kinshasa", "Democratic Republic of the Congo", 
                                   "Kongo_Central", "Kongo-Central", "Democratic Republic of the Congo", 
                                   "Kwango", "Kwango", "Democratic Republic of the Congo", 
                                   "Kwilu_Province", "Kwilu", "Democratic Republic of the Congo", 
                                   "Mai-Ndombe_Province", "Mai-Ndombe", "Democratic Republic of the Congo", 
                                   "Kasaï_Province", "Kasaï", "Democratic Republic of the Congo", 
                                   "Kasaï-Central", "Kasaï-Central", "Democratic Republic of the Congo", 
                                   "Kasaï-Oriental", "Kasaï-Oriental", "Democratic Republic of the Congo", 
                                   "Lomami_Province", "Lomami", "Democratic Republic of the Congo", 
                                   "Sankuru", "Sankuru", "Democratic Republic of the Congo", 
                                   "Maniema", "Maniema", "Democratic Republic of the Congo", 
                                   "South_Kivu", "Sud-Kivu", "Democratic Republic of the Congo", 
                                   "North_Kivu", "Nord-Kivu", "Democratic Republic of the Congo", 
                                   "Ituri_Province", "Ituri", "Democratic Republic of the Congo", 
                                   "Haut-Uele", "Haut-Uele", "Democratic Republic of the Congo", 
                                   "Tshopo", "Tshopo", "Democratic Republic of the Congo", 
                                   "Bas-Uele", "Bas-Uele", "Democratic Republic of the Congo", 
                                   "Nord-Ubangi", "Nord-Ubangi", "Democratic Republic of the Congo", 
                                   "Mongala", "Mongala", "Democratic Republic of the Congo", 
                                   "Sud-Ubangi", "Sud-Ubangi", "Democratic Republic of the Congo", 
                                   "Province_of_Équateur", "Équateur", "Democratic Republic of the Congo", 
                                   "Tshuapa", "Tshuapa", "Democratic Republic of the Congo", 
                                   "Tanganyika_Province", "Tanganyika", "Democratic Republic of the Congo", 
                                   "Haut-Lomami", "Haut-Lomami", "Democratic Republic of the Congo", 
                                   "Lualaba_Province", "Lualaba", "Democratic Republic of the Congo", 
                                   "Haut-Katanga_Province", "Haut-Katanga", "Democratic Republic of the Congo", 
                                   ####################
                                   ## republic of the congo "Republic of the Congo", 
                                   ####################
                                   "Kouilou_Department", "Kouilou", "Republic of the Congo", 
                                   "Niari_Department", "Niari", "Republic of the Congo", 
                                   "Lékoumou_Department", "Lékoumou", "Republic of the Congo", 
                                   "Bouenza_Department", "Bouenza", "Republic of the Congo", 
                                   "Pool_Department", "Pool", "Republic of the Congo", 
                                   "Plateaux_Department_(Republic_of_the_Congo)", "Plateaux", "Republic of the Congo", 
                                   "Cuvette_Department", "Cuvette", "Republic of the Congo", 
                                   "Cuvette-Ouest_Department", "Cuvette-Ouest", "Republic of the Congo", 
                                   "Sangha_Department_(Republic_of_the_Congo)", "Sangha", "Republic of the Congo", 
                                   "Likouala_Department", "Likouala", "Republic of the Congo", 
                                   "Brazzaville", "Brazzaville", "Republic of the Congo", 
                                   "Pointe-Noire", "Pointe Noire", "Republic of the Congo", 
                                   ####################
                                   ## ivory coast "Côte d'Ivoire", 
                                   ####################
                                   "Abidjan", "Abidjan", "Côte d'Ivoire", 
                                   "Bas-Sassandra_District", "Bas-Sassandra", "Côte d'Ivoire", 
                                   "Comoé_District", "Comoé", "Côte d'Ivoire", 
                                   "Denguélé_District", "Denguélé", "Côte d'Ivoire", 
                                   "Gôh-Djiboua_District", "Gôh-Djiboua", "Côte d'Ivoire", 
                                   "Lacs_District", "Lacs", "Côte d'Ivoire", 
                                   "Lagunes_District", "Lagunes", "Côte d'Ivoire", 
                                   "Montagnes_District", "Montagnes", "Côte d'Ivoire", 
                                   "Sassandra-Marahoué_District", "Sassandra-Marahoué", "Côte d'Ivoire", 
                                   "Savanes_District", "Savanes", "Côte d'Ivoire", 
                                   "Vallée_du_Bandama_District", "Vallée du Bandama", "Côte d'Ivoire", 
                                   "Woroba_District", "Woroba", "Côte d'Ivoire", 
                                   "Yamoussoukro", "Yamoussoukro", "Côte d'Ivoire", 
                                   "Zanzan_District", "Zanzan", "Côte d'Ivoire", 
                                   ####################
                                   ## djibouti "Djibouti", 
                                   ####################
                                   "Djibouti_Region", "Djiboutii", "Djibouti", 
                                   "Ali_Sabieh_Region", "Ali Sabieh", "Djibouti", 
                                   "Arta_Region", "Arta", "Djibouti", 
                                   "Dikhil_Region", "Dikhil", "Djibouti", 
                                   "Tadjourah_Region", "Tadjoura", "Djibouti", 
                                   "Obock_Region", "Obock", "Djibouti",
                                   ####################
                                   ## egypt "Egypt", 
                                   ####################
                                   "Alexandria_Governorate", "Al Iskandariyah", "Egypt", 
                                   "Aswan_Governorate", "Aswan", "Egypt", 
                                   "Asyut_Governorate", "Asyut", "Egypt", 
                                   "Beheira_Governorate", "Al Buhayrah", "Egypt", 
                                   "Beni_Suef_Governorate", "Bani Suwayf", "Egypt", 
                                   "Cairo_Governorate", "Al Qahirah", "Egypt", 
                                   "Dakahlia_Governorate", "Ad Daqahliyah", "Egypt", 
                                   "Damietta_Governorate", "Dumyat", "Egypt", 
                                   "Faiyum_Governorate", "Al Fayyum", "Egypt", 
                                   "Gharbia_Governorate", "Al Gharbiyah", "Egypt", 
                                   "Giza_Governorate", "Al Jizah", "Egypt", 
                                   "Ismailia_Governorate", "Al Isma`iliyah", "Egypt", 
                                   "Kafr_El_Sheikh_Governorate", "Kafr ash Shaykh", "Egypt", 
                                   "Luxor_Governorate", "Al Uqsur", "Egypt", 
                                   "Matrouh_Governorate", "Matrouh", "Egypt", 
                                   "Minya_Governorate", "Al Minya", "Egypt", 
                                   "Monufia_Governorate", "Al Minufiyah", "Egypt", 
                                   "New_Valley_Governorate", "Al Wadi al Jadid", "Egypt", 
                                   ## not in gadm global (for some reason) 
                                   ## but in gadm individual countries but not global
                                   "North_Sinai_Governorate", "Shamal Sina'", "Egypt",
                                   "Port_Said_Governorate", "Bur Sa`id", "Egypt", 
                                   "Qalyubiyya_Governorate", "Al Qalyubiyah", "Egypt", 
                                   "Qena_Governorate", "Qina", "Egypt", 
                                   "Red_Sea_Governorate", "Al Bahr al Ahmar", "Egypt", 
                                   "Sharqia_Governorate", "Ash Sharqiyah", "Egypt", 
                                   "Sohag_Governorate", "Suhaj", "Egypt", 
                                   "South_Sinai_Governorate", "Janub Sina'", "Egypt", 
                                   "Suez_Governorate", "As Suways", "Egypt", 
                                   ####################
                                   ## equatorial guinea "Equatorial Guinea", 
                                   ####################
                                   "Annobón", "Annobón", "Equatorial Guinea", 
                                   "Bioko_Norte", "Bioko Norte", "Equatorial Guinea", 
                                   "Bioko_Sur", "Bioko Sur", "Equatorial Guinea", 
                                   "Centro_Sur", "Centro Sur", "Equatorial Guinea", 
                                   "Kié-Ntem", "Kié-Ntem", "Equatorial Guinea", 
                                   "Litoral_(Equatorial_Guinea)", "Litoral", "Equatorial Guinea", 
                                   "Wele-Nzas", "Wele-Nzas", "Equatorial Guinea", 
                                   ## not in gadm
                                   # "Djibloho"
                                   ####################
                                   ## eritrea "Eritrea", 
                                   ####################
                                   "Maekel_Region", "Maekel", "Eritrea", 
                                   "Anseba_Region", "Anseba", "Eritrea", 
                                   "Gash-Barka_Region", "Gash Barka", "Eritrea", 
                                   "Debub_Region", "Debub", "Eritrea", 
                                   "Northern_Red_Sea_Region", "Semenawi Keyih Bahri", "Eritrea", 
                                   "Southern_Red_Sea_Region", "Debubawi Keyih Bahri", "Eritrea", 
                                   ####################
                                   ## eswatini "Swaziland", 
                                   ####################
                                   "Hhohho_Region", "Hhohho", "Swaziland", 
                                   "Manzini_Region", "Manzini", "Swaziland", 
                                   "Lubombo_Region", "Lubombo", "Swaziland", 
                                   "Shiselweni_Region", "Shiselweni", "Swaziland", 
                                   ####################
                                   ## ethiopia "Ethiopia", 
                                   ####################
                                   "Addis_Ababa", "Addis Abeba", "Ethiopia", 
                                   "Afar_Region", "Afar", "Ethiopia", 
                                   "Amhara_Region", "Amhara", "Ethiopia", 
                                   "Benishangul-Gumuz_Region", "Benshangul-Gumaz", "Ethiopia", 
                                   "Dire_Dawa", "Dire Dawa", "Ethiopia", 
                                   "Gambela_Region", "Gambela Peoples", "Ethiopia", 
                                   "Harari_Region", "Harari People", "Ethiopia", 
                                   "Oromia_Region", "Oromia", "Ethiopia", 
                                   ## not in gadm
                                   # "Sidama_Region", 
                                   "Somali_Region", "Somali", "Ethiopia", 
                                   ## not in gadm
                                   # "South_West_Ethiopia_Peoples%27_Region", # see if South_West_Ethiopia_Peoples'_Region
                                   "Southern_Nations,_Nationalities,_and_Peoples%27_Region", "Southern Nations, Nationalities", "Ethiopia", # see if Southern_Nations,_Nationalities,_and_Peoples'_Region
                                   "Tigray_Region", "Tigray", "Ethiopia", 
                                   ####################
                                   ## gabon "Gabon", 
                                   ####################
                                   "Estuaire_Province", "Estuaire", "Gabon", 
                                   "Haut-Ogooué_Province", "Haut-Ogooué", "Gabon", 
                                   "Moyen-Ogooué_Province", "Moyen-Ogooué", "Gabon", 
                                   "Ngounié_Province", "Ngounié", "Gabon", 
                                   "Nyanga_Province", "Nyanga", "Gabon", 
                                   "Ogooué-Ivindo_Province", "Ogooué-Ivindo", "Gabon", 
                                   "Ogooué-Lolo_Province","Ogooué-Lolo", "Gabon", 
                                   "Ogooué-Maritime_Province", "Ogooué-Maritime", "Gabon", 
                                   "Woleu-Ntem", "Wouleu-Ntem", "Gabon",
                                   ####################
                                   ## gambia "Gambia", 
                                   ####################
                                   "Banjul", "Banjul", "Gambia", 
                                   ## not in gadm, maccarty island instead
                                   "Central_River_Division", "Maccarthy Island", "Gambia", 
                                   "Lower_River_Division", "Lower River", "Gambia", 
                                   "North_Bank_Division", "North Bank", "Gambia", 
                                   "Upper_River_Division", "Upper River", "Gambia", 
                                   "West_Coast_Division_(Gambia)", "Western", "Gambia",
                                   ####################
                                   ## ghana "Ghana", 
                                   ####################
                                   "Ashanti_Region", "Ashanti", "Ghana", 
                                   "Bono_Region", "Bono", "Ghana", 
                                   "Bono_East_Region", "Bono East", "Ghana", 
                                   "Ahafo_Region", "Ahafo", "Ghana", 
                                   "Central_Region_(Ghana)", "Central", "Ghana", 
                                   "Eastern_Region_(Ghana)", "Eastern", "Ghana", 
                                   "Greater_Accra_Region", "Greater Accra", "Ghana", 
                                   "Northern_Region,_Ghana", "Northern", "Ghana", 
                                   "Savannah_Region", "Savannah", "Ghana", 
                                   "North_East_Region,_Ghana", "North East", "Ghana", 
                                   "Upper_East_Region", "Upper East", "Ghana", 
                                   "Upper_West_Region", "Upper West", "Ghana", 
                                   "Volta_Region", "Volta", "Ghana", 
                                   "Oti_Region", "Oti", "Ghana", 
                                   "Western_Region,_Ghana", "Western", "Ghana", 
                                   "Western_North_Region", "Western North", "Ghana", 
                                   ####################
                                   ## guinea "Guinea", 
                                   ####################
                                   "Boké_Region", "Boké", "Guinea", 
                                   "Conakry_Region", "Conakry", "Guinea", 
                                   "Faranah_Region", "Faranah", "Guinea", 
                                   "Kankan_Region", "Kankan", "Guinea", 
                                   "Kindia_Region", "Kindia", "Guinea", 
                                   "Labé_Region", "Labé", "Guinea", 
                                   "Mamou_Region", "Mamou", "Guinea", 
                                   "Nzérékoré_Region", "Nzérékoré", "Guinea", 
                                   ####################
                                   ## guinea bissau "Guinea-Bissau", 
                                   ####################
                                   "Bafatá_Region", "Bafatá", "Guinea-Bissau", 
                                   "Biombo_Region", "Biombo", "Guinea-Bissau", 
                                   "Bissau_Region", "Bissau", "Guinea-Bissau", 
                                   "Bolama_Region", "Bolama", "Guinea-Bissau", 
                                   "Cacheu_Region", "Cacheu", "Guinea-Bissau", 
                                   "Gabu_Region", "Gabú", "Guinea-Bissau", 
                                   "Oio_Region", "Oio", "Guinea-Bissau", 
                                   "Quinara_Region", "Quinara", "Guinea-Bissau", 
                                   "Tombali_Region", "Tombali", "Guinea-Bissau", 
                                   ####################
                                   ## kenya "Kenya", 
                                   ####################
                                   "Mombasa_County", "Mombasa", "Kenya", 
                                   "Kwale_County", "Kwale", "Kenya", 
                                   "Kilifi_County", "Kilifi", "Kenya", 
                                   "Tana_River_County", "Tana River", "Kenya", 
                                   "Lamu_County", "Lamu", "Kenya", 
                                   "Taita–Taveta_County", "Taita Taveta", "Kenya", 
                                   "Garissa_County", "Garissa", "Kenya", 
                                   "Wajir_County", "Wajir", "Kenya", 
                                   "Mandera_County", "Mandera", "Kenya", 
                                   "Marsabit_County", "Marsabit", "Kenya", 
                                   "Isiolo_County", "Isiolo", "Kenya", 
                                   "Meru_County", "Meru", "Kenya", 
                                   "Tharaka-Nithi_County", "Tharaka-Nithi", "Kenya", 
                                   "Embu_County", "Embu", "Kenya", 
                                   "Kitui_County", "Kitui", "Kenya", 
                                   "Machakos_County", "Machakos", "Kenya", 
                                   "Makueni_County", "Makueni", "Kenya", 
                                   "Nyandarua_County", "Nyandarua", "Kenya", 
                                   "Nyeri_County", "Nyeri", "Kenya", 
                                   "Kirinyaga_County", "Kirinyaga", "Kenya", 
                                   "Muranga_County", "Murang'a", "Kenya", 
                                   "Kiambu_County", "Kiambu", "Kenya", 
                                   "Turkana_County", "Turkana", "Kenya", 
                                   "West_Pokot_County", "West Pokot", "Kenya", 
                                   "Samburu_County", "Samburu", "Kenya", 
                                   "Trans-Nzoia_County", "Trans Nzoia", "Kenya", 
                                   "Uasin_Gishu_County", "Uasin Gishu", "Kenya", 
                                   "Elgeyo-Marakwet_County", "Elgeyo-Marakwet", "Kenya", 
                                   "Nandi_County", "Nandi", "Kenya", 
                                   "Baringo_County", "Baringo", "Kenya", 
                                   "Laikipia_County", "Laikipia", "Kenya", 
                                   "Nakuru_County", "Nakuru", "Kenya", 
                                   "Narok_County", "Narok", "Kenya", 
                                   "Kajiado_County", "Kajiado", "Kenya", 
                                   "Kericho_County", "Kericho", "Kenya", 
                                   "Bomet_County", "Bomet", "Kenya", 
                                   "Kakamega_County", "Kakamega", "Kenya", 
                                   "Vihiga_County", "Vihiga", "Kenya", 
                                   "Bungoma_County", "Bungoma", "Kenya", 
                                   "Busia_County", "Busia", "Kenya", 
                                   "Siaya_County", "Siaya", "Kenya", 
                                   "Kisumu_County", "Kisumu", "Kenya", 
                                   "Homa_Bay_County", "Homa Bay", "Kenya", 
                                   "Migori_County", "Migori", "Kenya", 
                                   "Kisii_County", "Kisii", "Kenya", 
                                   "Nyamira_County", "Nyamira", "Kenya", 
                                   "Nairobi_County", "Nairobi", "Kenya", 
                                   ####################
                                   ## lesotho "Lesotho", 
                                   ####################
                                   "Berea_District", "Berea", "Lesotho", 
                                   "Butha-Buthe_District", "Butha-Buthe", "Lesotho", 
                                   "Leribe_District", "Leribe", "Lesotho", 
                                   "Mafeteng_District", "Mafeteng", "Lesotho", 
                                   "Maseru_District", "Maseru", "Lesotho", 
                                   "Mohale%27s_Hoek_District", "Mohale's Hoek", "Lesotho", # see if Mohale's_Hoek_District
                                   "Mokhotlong_District", "Mokhotlong", "Lesotho", 
                                   "Qacha%27s_Nek_District", "Qacha's Nek", "Lesotho", # see if Qacha's_Nek_District
                                   "Quthing_District", "Quthing", "Lesotho", 
                                   "Thaba-Tseka_District", "Thaba-Tseka", "Lesotho", 
                                   ####################
                                   ## liberia "Liberia", 
                                   ####################
                                   "Bomi_County", "Bomi", "Liberia", 
                                   "Bong_County", "Bong", "Liberia", 
                                   "Gbarpolu_County", "Gbapolu", "Liberia", 
                                   "Grand_Bassa_County", "Grand Bassa", "Liberia", 
                                   "Grand_Cape_Mount_County", "Grand Cape Mount", "Liberia", 
                                   "Grand_Gedeh_County", "Grand Gedeh", "Liberia", 
                                   "Grand_Kru_County", "Grand Kru", "Liberia", 
                                   "Lofa_County", "Lofa", "Liberia", 
                                   "Margibi_County", "Margibi", "Liberia", 
                                   "Maryland_County", "Maryland", "Liberia", 
                                   "Montserrado_County", "Montserrado", "Liberia", 
                                   "Nimba_County", "Nimba", "Liberia", 
                                   "Rivercess_County", "Rivercess", "Liberia", 
                                   "River_Gee_County", "River Gee", "Liberia", 
                                   "Sinoe_County", "Sinoe", "Liberia", 
                                   ####################
                                   ## libya "Libya", 
                                   ####################
                                   "Butnan_District", "Al Butnan", "Libya", 
                                   "Derna_District", "Darnah", "Libya", 
                                   "Al_Jabal_al_Akhdar", "Al Jabal al Akhdar", "Libya", 
                                   "Marj_District", "Al Marj", "Libya", 
                                   "Benghazi", "Benghazi", "Libya", 
                                   "Al_Wahat_District", "Al Wahat", "Libya", 
                                   "Kufra_District", "Al Kufrah", "Libya", 
                                   "Sirte_District", "Surt", "Libya", 
                                   "Misrata_District", "Misratah", "Libya", 
                                   "Murqub_District", "Murzuq", "Libya", 
                                   "Tripoli_District,_Libya", "Tripoli", "Libya", 
                                   "Jafara", "Al Jifarah", "Libya", 
                                   "Zawiya_District", "Az Zawiyah", "Libya", 
                                   "An_Nuqat_al_Khams", "An Nuqat al Khams", "Libya", 
                                   "Jabal_al_Gharbi_District", "Al Jabal al Gharbi", "Libya", 
                                   "Nalut_District", "Nalut", "Libya", 
                                   "Jufra_District", "Al Jufrah", "Libya", 
                                   "Wadi_al_Shatii_District", "Wadi ash Shati'", "Libya", 
                                   "Sabha_District", "Sabha", "Libya", 
                                   "Wadi_al_Hayaa_District", "Wadi al Hayat", "Libya", 
                                   "Ghat_District", "Ghat", "Libya", 
                                   "Murzuq_District", "Al Marqab", "Libya", 
                                   ####################
                                   ## madagascar "Madagascar", 
                                   ####################
                                   "Antsiranana_Province", "Antsiranana", "Madagascar", 
                                   "Antananarivo_Province", "Antananarivo", "Madagascar", 
                                   "Mahajanga_Province", "Mahajanga", "Madagascar", 
                                   "Toamasina_Province", "Toamasina", "Madagascar", 
                                   "Fianarantsoa_Province", "Fianarantsoa", "Madagascar", 
                                   "Toliara_Province", "Toliary", "Madagascar",
                                   ####################
                                   ## malawi "Malawi", 
                                   ####################
                                   "Dedza_District","Dedza", "Malawi", 
                                   "Dowa_District", "Dowa", "Malawi", 
                                   "Kasungu_District", "Kasungu", "Malawi", 
                                   "Lilongwe_District", "Lilongwe", "Malawi", 
                                   "Mchinji_District", "Mchinji", "Malawi", 
                                   "Nkhotakota_District", "Nkhotakota", "Malawi", 
                                   "Ntcheu_District", "Ntcheu", "Malawi", 
                                   "Ntchisi_District", "Ntchisi", "Malawi", 
                                   "Salima_District", "Salima", "Malawi", 
                                   "Chitipa_District", "Chitipa", "Malawi", 
                                   "Karonga_District", "Karonga", "Malawi", 
                                   "Likoma_District", "Likoma", "Malawi", 
                                   "Mzimba_District", "Mzimba", "Malawi", 
                                   "Nkhata_Bay_District", "Nkhata Bay", "Malawi", 
                                   "Rumphi_District", "Rumphi", "Malawi", 
                                   "Balaka_District", "Balaka", "Malawi", 
                                   "Blantyre_District", "Blantyre", "Malawi", 
                                   "Chikwawa_District", "Chikwawa", "Malawi", 
                                   "Chiradzulu_District",  "Chiradzulu", "Malawi", 
                                   "Machinga_District", "Machinga", "Malawi", 
                                   "Mangochi_District", "Mangochi", "Malawi", 
                                   "Mulanje_District", "Mulanje", "Malawi", 
                                   "Mwanza_District", "Mwanza", "Malawi", 
                                   "Nsanje_District", "Nsanje", "Malawi", 
                                   "Thyolo_District", "Thyolo", "Malawi", 
                                   "Phalombe_District", "Phalombe", "Malawi", 
                                   "Zomba_District", "Zomba", "Malawi", 
                                   "Neno_District", "Neno", "Malawi", 
                                   ####################
                                   ## mali "Mali", 
                                   ####################
                                   "Kayes_Region", "Kayes", "Mali", 
                                   "Koulikoro_Region", "Koulikoro", "Mali", 
                                   "Bamako", "Bamako", "Mali", 
                                   "Sikasso_Region", "Sikasso", "Mali", 
                                   "Ségou_Region", "Ségou", "Mali", 
                                   "Mopti_Region", "Mopti", "Mali", 
                                   "Tombouctou_Region", "Timbuktu", "Mali", 
                                   "Gao_Region", "Gao", "Mali", 
                                   "Kidal_Region", "Kidal", "Mali", 
                                   ## not in gadm
                                   # "Taoudénit_Region",
                                   # "Ménaka_Region", 
                                   ####################
                                   ## mauritania "Mauritania", 
                                   ####################
                                   "Adrar_Region", "Adrar", "Mauritania", 
                                   "Assaba_Region", "Assaba", "Mauritania", 
                                   "Brakna_Region", "Brakna", "Mauritania", 
                                   "Dakhlet_Nouadhibou_Region", "Dakhlet Nouadhibou", "Mauritania", 
                                   "Gorgol_Region", "Gorgol", "Mauritania", 
                                   "Guidimaka_Region", "Guidimaka", "Mauritania", 
                                   "Hodh_Ech_Chargui_Region", "Hodh ech Chargui", "Mauritania", 
                                   "Hodh_El_Gharbi_Region", "Hodh el Gharbi", "Mauritania", 
                                   "Inchiri_Region", "Inchiri", "Mauritania", 
                                   ## not in gadm
                                   # "Nouakchott-Nord_Region", 
                                   # "Nouakchott-Ouest_Region",
                                   # "Nouakchott-Sud_Region",
                                   ## instead:
                                   "Nouakchott", "Nouakchott", "Mauritania", 
                                   "Tagant_Region", "Tagant", "Mauritania", 
                                   "Tiris_Zemmour_Region", "Tiris Zemmour", "Mauritania", 
                                   "Trarza_Region", "Trarza", "Mauritania", 
                                   ####################
                                   ## mauritius "Mauritius", 
                                   ####################
                                   "Agaléga", "Agalega Islands", "Mauritius", 
                                   "Rivière_Noire_District", "Black River", "Mauritius", 
                                   "Flacq_District", "Flacq", "Mauritius", 
                                   "Grand_Port_District", "Grand Port", "Mauritius", 
                                   "Moka_District", "Moka", "Mauritius", 
                                   "Pamplemousses_District", "Pamplemousses", "Mauritius", 
                                   "Plaines_Wilhems_District", "Plaines Wilhems", "Mauritius", 
                                   "Port_Louis_District", "Port Louis", "Mauritius", 
                                   "Rivière_du_Rempart_District", "Rivière du Rempart", "Mauritius", 
                                   "Rodrigues", "Rodriguez", "Mauritius", 
                                   "St._Brandon", "Saint Brandon", "Mauritius", 
                                   "Savanne_District", "Savanne", "Mauritius", 
                                   ####################
                                   ## morocco "Morocco", 
                                   ####################
                                   "Chaouia-Ouardigha", "Chaouia - Ouardigha", "Morocco", 
                                   "Doukkala-Abda", "Doukkala - Abda", "Morocco", 
                                   "F%C3%A8s-Boulemane", "Fès - Boulemane", "Morocco", 
                                   "Gharb-Chrarda-B%C3%A9ni_Hssen", "Gharb - Chrarda - Béni Hssen", "Morocco", 
                                   "Grand_Casablanca", "Grand Casablanca", "Morocco", 
                                   "Guelmim-Es_Semara", "Guelmim - Es-Semara", "Morocco", 
                                   "La%C3%A2youne-Boujdour-Sakia_El_Hamra", "Laâyoune - Boujdour - Sakia El H", "Morocco", 
                                   "Marrakech-Tensift-El_Haouz", "Marrakech - Tensift - Al Haouz", "Morocco", 
                                   "Mekn%C3%A8s-Tafilalet", "Meknès - Tafilalet", "Morocco", 
                                   "Oriental_(1997%E2%80%932015)", "Oriental", "Morocco", 
                                   "Rabat-Sal%C3%A9-Zemmour-Zaer", "Rabat - Salé - Zemmour - Zaer", "Morocco", 
                                   "Souss-Massa-Dr%C3%A2a", "Souss - Massa - Draâ", "Morocco", 
                                   "Tadla-Azilal", "Tadla - Azilal", "Morocco", 
                                   "Tangier-Tetouan", "Tanger - Tétouan", "Morocco", 
                                   "Taza-Al_Hoceima-Taounate", "Taza - Al Hoceima - Taounate", "Morocco", 
                                   ####################
                                   ## mozambique "Mozambique", 
                                   ####################
                                   "Cabo_Delgado_Province", "Cabo Delgado", "Mozambique", 
                                   "Gaza_Province", "Gaza", "Mozambique", 
                                   "Inhambane_Province", "Inhambane", "Mozambique", 
                                   "Manica_Province", "Manica", "Mozambique", 
                                   "Maputo_City", "Maputo City", "Mozambique", 
                                   "Maputo_Province", "Maputo", "Mozambique", 
                                   "Nampula_Province", "Nampula", "Mozambique", 
                                   "Niassa_Province", "Nassa", "Mozambique", 
                                   "Sofala_Province", "Sofala", "Mozambique", 
                                   "Tete_Province", "Tete", "Mozambique", 
                                   "Zambezia_Province", "Zambezia", "Mozambique", 
                                   ####################
                                   ## namibia "Namibia", 
                                   ####################
                                   "Kunene_Region", "Kunene", "Namibia", 
                                   "Omusati_Region", "Omusati", "Namibia", 
                                   "Oshana_Region", "Oshana", "Namibia", 
                                   "Ohangwena_Region", "Ohangwena", "Namibia", 
                                   "Oshikoto_Region", "Oshikoto", "Namibia", 
                                   ## not in gadm
                                   # "Kavango_West",
                                   # "Kavango_East",
                                   ## instead: 
                                   "Kavango_Region", "Kavango", "Namibia", 
                                   "Zambezi_Region", "Zambezi", "Namibia", 
                                   "Erongo_Region", "Erongo", "Namibia", 
                                   "Otjozondjupa_Region", "Otjozondjupa", "Namibia", 
                                   "Omaheke_Region", "Omaheke", "Namibia", 
                                   "Khomas_Region", "Khomas", "Namibia", 
                                   "Hardap_Region", "Hardap", "Namibia", 
                                   "%C7%81Karas_Region", "!Karas", "Namibia",  # see if ǁKaras_Region
                                   ####################
                                   ## niger "Niger", 
                                   ####################
                                   "Agadez_Region", "Agadez", "Niger", 
                                   "Diffa_Region", "Diffa", "Niger", 
                                   "Dosso_Region", "Dosso", "Niger", 
                                   "Maradi_Region", "Maradi", "Niger", 
                                   "Niamey", "Niamey", "Niger", 
                                   "Tahoua_Region", "Tahoua", "Niger", 
                                   "Tillabéri_Region", "Tillabéry", "Niger", 
                                   "Zinder_Region", "Zinder", "Niger", 
                                   ####################
                                   ## nigeria "Nigeria", 
                                   ####################
                                   "Abia_State", "Abia", "Nigeria", 
                                   "Adamawa_State", "Adamawa", "Nigeria", 
                                   "Akwa_Ibom_State", "Akwa Ibom", "Nigeria", 
                                   "Anambra_State", "Anambra", "Nigeria", 
                                   "Bauchi_State", "Bauchi", "Nigeria", 
                                   "Bayelsa_State", "Bayelsa", "Nigeria", 
                                   "Benue_State", "Benue", "Nigeria", 
                                   "Borno_State", "Borno", "Nigeria", 
                                   "Cross_River_State", "Cross River", "Nigeria", 
                                   "Delta_State", "Delta", "Nigeria", 
                                   "Ebonyi_State", "Ebonyi", "Nigeria", 
                                   "Edo_State", "Edo", "Nigeria", 
                                   "Ekiti_State", "Ekiti", "Nigeria", 
                                   "Enugu_State", "Enugu", "Nigeria", 
                                   "Gombe_State", "Gombe", "Nigeria", 
                                   "Imo_State", "Imo", "Nigeria", 
                                   "Jigawa_State", "Jigawa", "Nigeria", 
                                   "Kaduna_State", "Kaduna", "Nigeria", 
                                   "Kano_State", "Kano", "Nigeria", 
                                   "Katsina_State", "Katsina", "Nigeria", 
                                   "Kebbi_State", "Kebbi", "Nigeria", 
                                   "Kogi_State", "Kogi", "Nigeria", 
                                   "Kwara_State", "Kwara", "Nigeria", 
                                   "Lagos_State", "Lagos", "Nigeria", 
                                   "Nasarawa_State", "Nasarawa", "Nigeria", 
                                   "Niger_State", "Niger", "Nigeria", 
                                   "Ogun_State", "Ogun", "Nigeria", 
                                   "Ondo_State", "Ondo", "Nigeria", 
                                   "Osun_State", "Osun", "Nigeria", 
                                   "Oyo_State", "Oyo", "Nigeria", 
                                   "Plateau_State", "Plateau", "Nigeria", 
                                   "Rivers_State", "Rivers", "Nigeria", 
                                   "Sokoto_State", "Sokoto", "Nigeria", 
                                   "Taraba_State", "Taraba", "Nigeria", 
                                   "Yobe_State", "Yobe", "Nigeria", 
                                   "Zamfara_State", "Zamfara", "Nigeria", 
                                   "Federal_Capital_Territory_(Nigeria)", "Federal Capital Territory", "Nigeria", 
                                   ####################
                                   ## rwanda "Rwanda", 
                                   ####################
                                   "Kigali_Province,_Rwanda", "Umujyi wa Kigali", "Rwanda", 
                                   "Southern_Province,_Rwanda", "Amajyepfo", "Rwanda", 
                                   "Western_Province,_Rwanda", "Iburengerazuba", "Rwanda", 
                                   "Northern_Province,_Rwanda", "Amajyaruguru", "Rwanda", 
                                   "Eastern_Province,_Rwanda", "Iburasirazuba", "Rwanda", 
                                   ## sao tome and principe "São Tomé and Príncipe", 
                                   "São_Tomé_Island", "São Tomé", "São Tomé and Príncipe", 
                                   "Autonomous_Region_of_Príncipe", "Príncipe", "São Tomé and Príncipe", 
                                   ####################
                                   ## senegal "Senegal", 
                                   ####################
                                   "Dakar_Region", "Dakar", "Senegal", 
                                   "Ziguinchor_Region", "Ziguinchor", "Senegal", 
                                   "Diourbel_Region", "Diourbel", "Senegal", 
                                   "Saint-Louis_Region", "Saint-Louis", "Senegal", 
                                   "Tambacounda_Region", "Tambacounda", "Senegal", 
                                   "Kaolack_Region", "Kaolack", "Senegal", 
                                   "Thiès_Region", "Thiès", "Senegal", 
                                   "Louga_Region", "Louga", "Senegal", 
                                   "Fatick_Region", "Fatick", "Senegal", 
                                   "Kolda_Region", "Kolda", "Senegal", 
                                   "Matam_Region", "Matam", "Senegal", 
                                   "Kaffrine_Region", "Kaffrine", "Senegal", 
                                   "Kédougou_Region", "Kédougou", "Senegal", 
                                   "Sédhiou_Region", "Sédhiou", "Senegal", 
                                   ####################
                                   ## seychelles "Seychelles", 
                                   ####################
                                   "Anse_aux_Pins", "Anse aux Pins", "Seychelles", 
                                   "Anse_Boileau", "Anse Boileau", "Seychelles", 
                                   "Anse_Etoile", "Anse Étoile", "Seychelles", 
                                   "Au_Cap", "Au Cap", "Seychelles", 
                                   "Anse_Royale", "Anse Royale", "Seychelles", 
                                   "Baie_Lazare", "Baie Lazare", "Seychelles", 
                                   "Beau_Vallon,_Seychelles", "Beau Vallon", "Seychelles", 
                                   "Bel_Air,_Seychelles", "Bel Air", "Seychelles", 
                                   "Bel_Ombre,_Seychelles", "Belombre", "Seychelles", 
                                   "Cascade,_Seychelles", "Cascade", "Seychelles", 
                                   "Glacis,_Seychelles", "Glacis", "Seychelles", 
                                   "Grand%27Anse_Mahé", "Grand' Anse", "Seychelles", # see if Grand'Anse_Mahé
                                   "English_River,_Seychelles", "English River", "Seychelles", 
                                   "Mont_Buxton", "Mont Buxton", "Seychelles", 
                                   "Mont_Fleuri", "Mont Fleuri", "Seychelles", 
                                   "Plaisance,_Seychelles", "Plaisance", "Seychelles", 
                                   "Pointe_La_Rue", "Pointe La Rue", "Seychelles", 
                                   "Port_Glaud", "Port Glaud", "Seychelles", 
                                   "Saint_Louis,_Seychelles", "Saint Louis", "Seychelles", 
                                   "Takamaka,_Seychelles", "Takamaka", "Seychelles", 
                                   "Les_Mamelles", "Les Mamelles", "Seychelles", 
                                   "Roche_Caiman", "Roche Caïman", "Seychelles", 
                                   "Baie_Sainte_Anne", "Baie Sainte Anne", "Seychelles", 
                                   "Grand%27Anse_Praslin", "Grand'Anse Praslin", "Seychelles", # see if Grand'Anse_Praslin
                                   "La_Digue_and_Inner_Islands", "La Digue and Inner Islands", "Seychelles", 
                                   "Outer_Islands_(Seychelles)", "Outer Islands", "Seychelles", 
                                   ####################
                                   ## sierra leone "Sierra Leone", 
                                   ####################
                                   "Eastern_Province,_Sierra_Leone", "Eastern", "Sierra Leone", 
                                   "Northern_Province,_Sierra_Leone", "Northern", "Sierra Leone", 
                                   "Southern_Province,_Sierra_Leone", "Southern", "Sierra Leone", 
                                   "North_West_Province,_Sierra_Leone", "Western", "Sierra Leone",
                                   ####################
                                   ## somalia "Somalia", 
                                   ####################
                                   "Awdal", "Awdal", "Somalia", 
                                   "Bakool", "Bakool", "Somalia", 
                                   "Banaadir", "Banaadir", "Somalia", 
                                   "Bari,_Somalia", "Bari", "Somalia", 
                                   "Bay,_Somalia", "Bay", "Somalia", 
                                   "Galguduud", "Galguduud", "Somalia", 
                                   "Gedo", "Gedo", "Somalia", 
                                   "Hiran,_Somalia", "Hiiraan", "Somalia", 
                                   "Middle_Juba", "Jubbada Dhexe", "Somalia", 
                                   "Lower_Juba", "Jubbada Hoose", "Somalia", 
                                   "Mudug", "Mudug", "Somalia", 
                                   "Nugal,_Somalia", "Nugaal", "Somalia", 
                                   "Sanaag", "Sanaag", "Somalia", 
                                   "Middle_Shabelle", "Shabeellaha Dhexe", "Somalia", # dhexe
                                   "Lower_Shabelle", "Shabeellaha Hoose", "Somalia", # hoose
                                   "Sool,_Somalia", "Sool", "Somalia", 
                                   "Togdheer", "Togdheer", "Somalia", 
                                   "Woqooyi_Galbeed", "Woqooyi Galbeed", "Somalia", 
                                   ####################
                                   ## south africa "South Africa", 
                                   ####################
                                   "Eastern_Cape", "Eastern Cape", "South Africa", 
                                   "Free_State_(province)", "Free State", "South Africa", 
                                   "Gauteng", "Gauteng", "South Africa", 
                                   "KwaZulu-Natal", "KwaZulu-Natal", "South Africa", 
                                   "Limpopo", "Limpopo", "South Africa", 
                                   "Mpumalanga", "Mpumalanga", "South Africa", 
                                   "North_West_(South_African_province)", "North West", "South Africa", 
                                   "Northern_Cape", "Northern Cape", "South Africa", 
                                   "Western_Cape", "Western Cape", "South Africa", 
                                   ####################
                                   ## south sudan "South Sudan", 
                                   ####################
                                   "Northern_Bahr_el_Ghazal", "North Bahr-al-Ghazal", "South Sudan", 
                                   "Western_Bahr_el_Ghazal", "West Bahr-al-Ghazal", "South Sudan", 
                                   "Lakes_(state)", "Lakes", "South Sudan", 
                                   "Warrap_(state)", "Warap", "South Sudan", 
                                   "Western_Equatoria", "West Equatoria", "South Sudan", 
                                   "Central_Equatoria", "Central Equatoria", "South Sudan", 
                                   "Eastern_Equatoria", "Eastern Equatoria", "South Sudan", 
                                   "Jonglei", "Jungoli", "South Sudan", 
                                   "Unity_(state)", "Unity", "South Sudan", 
                                   "Upper_Nile_(state)", "Upper Nile", "South Sudan", 
                                   ####################
                                   ## sudan "Sudan", 
                                   ####################
                                   "Khartoum_(state)", "Khartoum", "Sudan", 
                                   "North_Kordofan", "North Kurdufan", "Sudan", 
                                   "Northern_(state)", "Northern", "Sudan", 
                                   "Kassala_(state)", "Kassala", "Sudan", 
                                   "Blue_Nile_(state)", "Blue Nile", "Sudan", 
                                   "North_Darfur", "North Darfur", "Sudan", 
                                   "South_Darfur", "South Darfur", "Sudan", 
                                   "South_Kordofan", "South Kurdufan", "Sudan", 
                                   "Al_Jazirah_(state)", "Al Jazirah", "Sudan", 
                                   "White_Nile_(state)", "White Nile", "Sudan", 
                                   "River_Nile_(state)", "River Nile", "Sudan", 
                                   "Red_Sea_(state)", "Red Sea", "Sudan", 
                                   "Al_Qadarif_(state)", "Al Qadarif", "Sudan", 
                                   "Sennar_(state)", "Sennar", "Sudan", 
                                   "West_Darfur", "West Darfur", "Sudan", 
                                   "Central_Darfur", "Central Darfur", "Sudan", 
                                   "East_Darfur", "East Darfur", "Sudan", 
                                   "West_Kordofan", "West Kurdufan", "Sudan", 
                                   ####################
                                   ## tanzania "Tanzania", 
                                   ####################
                                   "Arusha_Region", "Arusha", "Tanzania", 
                                   "Dar_es_Salaam_Region", "Dar es Salaam", "Tanzania", 
                                   "Dodoma_Region", "Dodoma", "Tanzania", 
                                   "Geita_Region", "Geita", "Tanzania", 
                                   "Iringa_Region", "Iringa", "Tanzania", 
                                   "Kagera_Region", "Kagera", "Tanzania", 
                                   "Katavi_Region", "Katavi", "Tanzania", 
                                   "Kigoma_Region", "Kigoma", "Tanzania", 
                                   "Kilimanjaro_Region", "Kilimanjaro", "Tanzania", 
                                   "Lindi_Region", "Lindi", "Tanzania", 
                                   "Manyara_Region", "Manyara", "Tanzania", 
                                   "Mara_Region", "Mara", "Tanzania", 
                                   "Mbeya_Region", "Mbeya", "Tanzania", 
                                   "Mjini_Magharibi_Region", "Mjini Magharibi", "Tanzania", 
                                   "Morogoro_Region", "Morogoro", "Tanzania", 
                                   "Mtwara_Region", "Mtwara", "Tanzania", 
                                   "Mwanza_Region", "Mwanza", "Tanzania", 
                                   "Njombe_Region", "Njombe", "Tanzania", 
                                   "Pemba_North_Region", "Kaskazini Pemba", "Tanzania", 
                                   "Pemba_South_Region", "Kaskazini Unguja", "Tanzania", 
                                   "Pwani_Region", "Pwani", "Tanzania", 
                                   "Rukwa_Region", "Rukwa", "Tanzania", 
                                   "Ruvuma_Region", "Ruvuma", "Tanzania", 
                                   "Shinyanga_Region", "Shinyanga", "Tanzania", 
                                   "Simiyu_Region", "Simiyu", "Tanzania", 
                                   "Singida_Region", "Singida", "Tanzania", 
                                   "Songwe_Region", "Songwe", "Tanzania", 
                                   "Tabora_Region", "Tabora", "Tanzania", 
                                   "Tanga_Region", "Tanga", "Tanzania", 
                                   "Unguja_North_Region", "Kusini Pemba", "Tanzania", 
                                   "Unguja_South_Region", "Kusini Unguja", "Tanzania", 
                                   ####################
                                   ## togo "Togo", 
                                   ####################
                                   "Centrale_Region,_Togo", "Centre", "Togo", 
                                   "Kara_Region", "Kara", "Togo", 
                                   "Maritime_Region", "Maritime", "Togo", 
                                   "Plateaux_Region,_Togo", "Plateaux", "Togo", 
                                   "Savanes_Region,_Togo", "Savanes", "Togo", 
                                   ####################
                                   ## tunisia "Tunisia", 
                                   ####################
                                   "Ariana_Governorate", "Ariana", "Tunisia", 
                                   "Béja_Governorate", "Béja", "Tunisia", 
                                   "Ben_Arous_Governorate", "Ben Arous (Tunis Sud)", "Tunisia", 
                                   "Bizerte_Governorate", "Bizerte", "Tunisia", 
                                   "Gabès_Governorate", "Gabès", "Tunisia", 
                                   "Gafsa_Governorate", "Gafsa", "Tunisia", 
                                   "Jendouba_Governorate", "Jendouba", "Tunisia", 
                                   "Kairouan_Governorate", "Kairouan", "Tunisia", 
                                   "Kasserine_Governorate", "Kassérine", "Tunisia", 
                                   "Kebili_Governorate", "Kebili", "Tunisia", 
                                   "Kef_Governorate", "Le Kef", "Tunisia", 
                                   "Mahdia_Governorate", "Mahdia", "Tunisia", 
                                   "Manouba_Governorate", "Manubah", "Tunisia", 
                                   "Medenine_Governorate", "Médenine", "Tunisia", 
                                   "Monastir_Governorate", "Monastir", "Tunisia", 
                                   "Nabeul_Governorate", "Nabeul", "Tunisia", 
                                   "Sfax_Governorate", "Sfax", "Tunisia", 
                                   "Sidi_Bouzid_Governorate", "Sidi Bou Zid", "Tunisia", 
                                   "Siliana_Governorate", "Siliana", "Tunisia", 
                                   "Sousse_Governorate", "Sousse", "Tunisia", 
                                   "Tataouine_Governorate", "Tataouine", "Tunisia", 
                                   "Tozeur_Governorate", "Tozeur", "Tunisia", 
                                   "Tunis_Governorate", "Tunis", "Tunisia", 
                                   "Zaghouan_Governorate", "Zaghouan", "Tunisia", 
                                   ####################
                                   ## uganda "Uganda", 
                                   ####################
                                   "Adjumani_District", "Adjumani", "Uganda", 
                                   "Apac_District", "Apac", "Uganda", 
                                   "Arua_District", "Arua", "Uganda", 
                                   "Bugiri_District", "Bugiri", "Uganda", 
                                   "Bundibugyo_District", "Bundibugyo", "Uganda", 
                                   "Bushenyi_District", "Bushenyi", "Uganda", 
                                   "Busia_District,_Uganda", "Busia", "Uganda", 
                                   "Gulu_District", "Gulu", "Uganda", 
                                   "Hoima_District", "Hoima", "Uganda", 
                                   "Iganga_District", "Iganga", "Uganda", 
                                   "Jinja_District", "Jinja", "Uganda", 
                                   "Kabale_District", "Kabale", "Uganda", 
                                   "Kabarole_District", "Kabarole", "Uganda", 
                                   "Kaberamaido_District", "Kaberamaido", "Uganda", 
                                   "Kalangala_District", "Kalangala", "Uganda", 
                                   "Kampala", "Kampala", "Uganda", 
                                   "Kamuli_District", "Kamuli", "Uganda", 
                                   "Kamwenge_District", "Kamwenge", "Uganda", 
                                   "Kanungu_District", "Kanungu", "Uganda", 
                                   "Kapchorwa_District", "Kapchorwa", "Uganda", 
                                   "Kasese_District", "Kasese", "Uganda", 
                                   "Katakwi_District", "Katakwi", "Uganda", 
                                   "Kayunga_District", "Kayunga", "Uganda", 
                                   "Kibale_District", "Kibale", "Uganda", 
                                   "Kiboga_District", "Kiboga", "Uganda", 
                                   "Kisoro_District", "Kisoro", "Uganda", 
                                   "Kitgum_District", "Kitgum", "Uganda", 
                                   "Kotido_District", "Kotido", "Uganda", 
                                   "Kumi_District", "Kumi", "Uganda", 
                                   "Kyenjojo_District", "Kyenjojo", "Uganda", 
                                   "Lake_Albert_(Africa)", "Lake Albert", "Uganda", # not officially listed as district, but in gadm
                                   "Lake_Victoria", "Lake Victoria", "Uganda", # not officially listed as district, but in gadm
                                   "Lira_District", "Lira", "Uganda", 
                                   "Luwero_District", "Luwero", "Uganda", 
                                   "Masaka_District", "Masaka", "Uganda", 
                                   "Masindi_District", "Masindi", "Uganda", 
                                   "Mayuge_District", "Mayuge", "Uganda", 
                                   "Mbale_District", "Mbale", "Uganda", 
                                   "Mbarara_District", "Mbarara", "Uganda", 
                                   "Moroto_District", "Moroto", "Uganda", 
                                   "Moyo_District", "Moyo", "Uganda", 
                                   "Mpigi_District", "Mpigi", "Uganda", 
                                   "Mubende_District", "Mubende", "Uganda", 
                                   "Mukono_District", "Mukono", "Uganda", 
                                   "Nakapiripirit_District", "Nakapiripirit", "Uganda", 
                                   "Nakasongola_District", "Nakasongola", "Uganda", 
                                   "Nebbi_District", "Nebbi", "Uganda", 
                                   "Ntungamo_District", "Ntungamo", "Uganda", 
                                   "Pader_District", "Pader", "Uganda", 
                                   "Pallisa_District", "Pallisa", "Uganda", 
                                   "Rakai_District", "Rakai", "Uganda", 
                                   "Rukungiri_District", "Rukungiri", "Uganda", 
                                   "Sembabule_District", "Sembabule", "Uganda", 
                                   "Sironko_District", "Sironko", "Uganda", 
                                   "Soroti_District", "Soroti", "Uganda", 
                                   "Tororo_District", "Tororo", "Uganda", 
                                   "Wakiso_District", "Wakiso", "Uganda", 
                                   "Yumbe_District", "Yumbe", "Uganda", 
                                   ####################
                                   ## zambia "Zambia", 
                                   ####################
                                   "Central_Province,_Zambia", "Central", "Zambia", 
                                   "Copperbelt_Province", "Copperbelt", "Zambia", 
                                   "Eastern_Province,_Zambia", "Eastern", "Zambia", 
                                   "Luapula_Province", "Luapula", "Zambia", 
                                   "Lusaka_Province", "Lusaka", "Zambia", 
                                   "Muchinga_Province", "Muchinga", "Zambia", 
                                   "North-Western_Province,_Zambia", "North-Western", "Zambia", 
                                   "Northern_Province,_Zambia", "Northern", "Zambia", 
                                   "Southern_Province,_Zambia", "Southern", "Zambia", 
                                   "Western_Province,_Zambia", "Western", "Zambia", 
                                   ####################
                                   ## zimbabwe "Zimbabwe", 
                                   ####################
                                   "Bulawayo", "Bulawayo", "Zimbabwe", 
                                   "Harare_Province", "Harare", "Zimbabwe", 
                                   "Manicaland_Province", "Manicaland", "Zimbabwe", 
                                   "Mashonaland_Central_Province", "Mashonaland Central", "Zimbabwe", 
                                   "Mashonaland_East_Province", "Mashonaland East", "Zimbabwe", 
                                   "Mashonaland_West_Province", "Mashonaland West", "Zimbabwe", 
                                   "Masvingo_Province", "Masvingo", "Zimbabwe", 
                                   "Matabeleland_North_Province", "Matabeleland North", "Zimbabwe", 
                                   "Matabeleland_South_Province", "Matabeleland South", "Zimbabwe", 
                                   "Midlands_Province", "Midlands", "Zimbabwe"
                                   ####################
                                   ## Western Sahara "Western Sahara", 
                                   ####################
                                   # "Boujdour_Province", "Boujdour", "Western Sahara",
                                   # "Es_Semara_Province", "Es Semara", "Western Sahara",
                                   # "Laâyoune_Province", "Laayoune", "Western Sahara",
                                   # "Oued_Ed-Dahab_Province", "Oued el Dahab", "Western Sahara"
                                   ),
                                 nrow = 850, ncol = 3, byrow = TRUE,
                                 dimnames = list(NULL, c("wikipedia_en", "gadmname", "country"))))


sum(is.na(africa_dict))

## add pages in other languages
for (i in 1:nrow(africa_dict)) {
  print(africa_dict$wikipedia_en[i])
  ## spanish (es)
  africa_dict$wikipedia_es[i] <- url_decode_utf(ifelse((wp_linked_pages(africa_dict$wikipedia_en[i], "en")$lang == "es") == TRUE, 0, wp_linked_pages(africa_dict$wikipedia_en[i], "en")$page[wp_linked_pages(africa_dict$wikipedia_en[i], "en")$lang == "es"]))
  # africa_dict$wikipedia_es[i] <- wp_linked_pages(africa_dict$wikipedia_en[i], "en")$page[wp_linked_pages(africa_dict$wikipedia_en[i], "en")$lang == "es"]
  ## french )fr)
  africa_dict$wikipedia_fr[i] <- url_decode_utf(ifelse((wp_linked_pages(africa_dict$wikipedia_en[i], "en")$lang == "fr") == TRUE, 0, wp_linked_pages(africa_dict$wikipedia_en[i], "en")$page[wp_linked_pages(africa_dict$wikipedia_en[i], "en")$lang == "fr"]))
  # africa_dict$wikipedia_fr[i] <- wp_linked_pages(africa_dict$wikipedia_en[i], "en")$page[wp_linked_pages(africa_dict$wikipedia_en[i], "en")$lang == "fr"]
  ## chinese (zh)
  africa_dict$wikipedia_zh[i] <- url_decode_utf(ifelse((wp_linked_pages(africa_dict$wikipedia_en[i], "en")$lang == "zh") == TRUE, 0, wp_linked_pages(africa_dict$wikipedia_en[i], "en")$page[wp_linked_pages(africa_dict$wikipedia_en[i], "en")$lang == "zh"]))
  # africa_dict$wikipedia_zh[i] <- wp_linked_pages(africa_dict$wikipedia_en[i], "en")$page[wp_linked_pages(africa_dict$wikipedia_en[i], "en")$lang == "zh"]
  ## hindi (hi)
  africa_dict$wikipedia_hi[i] <- url_decode_utf(ifelse((wp_linked_pages(africa_dict$wikipedia_en[i], "en")$lang == "hi") == TRUE, 0, wp_linked_pages(africa_dict$wikipedia_en[i], "en")$page[wp_linked_pages(africa_dict$wikipedia_en[i], "en")$lang == "hi"]))
  # africa_dict$wikipedia_hi[i] <- wp_linked_pages(africa_dict$wikipedia_en[i], "en")$page[wp_linked_pages(africa_dict$wikipedia_en[i], "en")$lang == "hi"]
  ## arabic (ar)
  africa_dict$wikipedia_ar[i] <- url_decode_utf(ifelse((wp_linked_pages(africa_dict$wikipedia_en[i], "en")$lang == "ar") == TRUE, 0, wp_linked_pages(africa_dict$wikipedia_en[i], "en")$page[wp_linked_pages(africa_dict$wikipedia_en[i], "en")$lang == "ar"]))
  # africa_dict$wikipedia_ar[i] <- wp_linked_pages(africa_dict$wikipedia_en[i], "en")$page[wp_linked_pages(africa_dict$wikipedia_en[i], "en")$lang == "ar"]
  ## bengali (bn)
  africa_dict$wikipedia_bn[i] <- url_decode_utf(ifelse((wp_linked_pages(africa_dict$wikipedia_en[i], "en")$lang == "bn") == TRUE, 0, wp_linked_pages(africa_dict$wikipedia_en[i], "en")$page[wp_linked_pages(africa_dict$wikipedia_en[i], "en")$lang == "bn"]))
  # africa_dict$wikipedia_bn[i] <- wp_linked_pages(africa_dict$wikipedia_en[i], "en")$page[wp_linked_pages(africa_dict$wikipedia_en[i], "en")$lang == "bn"]
  ## urdu (ur)
  africa_dict$wikipedia_ur[i] <- url_decode_utf(ifelse((wp_linked_pages(africa_dict$wikipedia_en[i], "en")$lang == "ur") == TRUE, 0, wp_linked_pages(africa_dict$wikipedia_en[i], "en")$page[wp_linked_pages(africa_dict$wikipedia_en[i], "en")$lang == "ur"]))
  # africa_dict$wikipedia_ur[i] <- wp_linked_pages(africa_dict$wikipedia_en[i], "en")$page[wp_linked_pages(africa_dict$wikipedia_en[i], "en")$lang == "ur"]
  ## portuguese (pt)
  africa_dict$wikipedia_pt[i] <- url_decode_utf(ifelse((wp_linked_pages(africa_dict$wikipedia_en[i], "en")$lang == "pt") == TRUE, 0, wp_linked_pages(africa_dict$wikipedia_en[i], "en")$page[wp_linked_pages(africa_dict$wikipedia_en[i], "en")$lang == "pt"]))
  # africa_dict$wikipedia_pt[i] <- wp_linked_pages(africa_dict$wikipedia_en[i], "en")$page[wp_linked_pages(africa_dict$wikipedia_en[i], "en")$lang == "pt"]
  ## german (de)
  africa_dict$wikipedia_de[i] <- url_decode_utf(ifelse((wp_linked_pages(africa_dict$wikipedia_en[i], "en")$lang == "de") == TRUE, 0, wp_linked_pages(africa_dict$wikipedia_en[i], "en")$page[wp_linked_pages(africa_dict$wikipedia_en[i], "en")$lang == "de"]))
  # africa_dict$wikipedia_de[i] <- wp_linked_pages(africa_dict$wikipedia_en[i], "en")$page[wp_linked_pages(africa_dict$wikipedia_en[i], "en")$lang == "de"]
  ## russian (ru)
  africa_dict$wikipedia_ru[i] <- url_decode_utf(ifelse((wp_linked_pages(africa_dict$wikipedia_en[i], "en")$lang == "ru") == TRUE, 0, wp_linked_pages(africa_dict$wikipedia_en[i], "en")$page[wp_linked_pages(africa_dict$wikipedia_en[i], "en")$lang == "ru"]))
  # africa_dict$wikipedia_ru[i] <- wp_linked_pages(africa_dict$wikipedia_en[i], "en")$page[wp_linked_pages(africa_dict$wikipedia_en[i], "en")$lang == "ru"]
}
sum(is.na(africa_dict))
sapply(africa_dict, function(x) sum(is.na(x)))
## check issues here
str(africa_dict)

## look into whether merging like this works:
## taken from click click boom
countries <- c("Algeria",
               "Angola",
               "Benin",
               "Botswana",
               "Burkina_Faso",
               "Burundi",
               "Cameroon",
               "Cape_Verde",
               "Central_African_Republic",
               "Chad",
               "Comoros",
               "Democratic_Republic_of_the_Congo",
               "Republic_of_the_Congo",
               "Ivory_Coast",
               "Djibouti",
               "Egypt",
               "Equatorial_Guinea",
               "Eritrea",
               "Eswatini",
               "Ethiopia",
               "Gabon",
               "The_Gambia",
               "Ghana",
               "Guinea",
               "Guinea-Bissau",
               "Kenya",
               "Lesotho",
               "Liberia",
               "Libya",
               "Madagascar",
               "Malawi",
               "Mali",
               "Mauritania",
               "Mauritius",
               "Morocco",
               "Mozambique",
               "Namibia",
               "Niger",
               "Nigeria",
               "Rwanda",
               "São_Tomé_and_Príncipe",
               "Senegal",
               "Seychelles",
               "Sierra_Leone",
               "Somalia",
               "South_Africa",
               "South_Sudan",
               "Sudan",
               # "Swaziland", 
               "Tanzania",
               "Togo",
               "Tunisia",
               "Uganda",
               "Zambia",
               "Zimbabwe") %>%
  str_replace_all("_", " ") %>%
  tolower()

# gwno.codes <- countrycode::countrycode(countries, origin = "country.name", destination = "gwn")
gwno.codes <- countrycode::countrycode(countries, origin = "country.name", destination = "gwn",
                                       ## microstates not automatically matched
                                       ## create custom match
                                       ## http://ksgleditsch.com/data/microstatessystem.dat
                                       ## http://ksgleditsch.com/data/iisystem.dat
                                       custom_match = c(# c("Andorra" = "232"),
                                         # c("Antigua and Barbuda" = "58"),
                                         # c("Dominica" = "54"),
                                         # c("Grenada" = "55"),
                                         # c("Kiribati" = "970"),
                                         # c("Liechtenstein" = "223"),
                                         # c("Marshall Islands" = "983"),
                                         # c("Federated States of Micronesia" = "987"),
                                         # c("Monaco" = "221"),
                                         # c("Nauru" = "971"),
                                         # c("Palau" = "986"),
                                         # c("Saint Kitts and Nevis" = "60"),
                                         # c("Saint Lucia" = "56"),
                                         # c("Saint Vincent and the Grenadines" = "57"),
                                         # c("Samoa" = "990"),
                                         # c("San Marino" = "331"),
                                         c("são tomé and príncipe" = "403"),
                                         c("seychelles" = "591")
                                         # c("Tonga" = "972"),
                                         # c("Tuvalu" = "973"),
                                         # c("Vanuatu" = "935"),
                                         # c("Yemen" = "678"))
                                       ))

## custom dictionary for adding country names later (ISO codes do not run smoothly via countryname()..)
country_dict <- data.frame(matrix(c("Algeria", "DZA",
                                    "Angola", "AGO",
                                    "Benin", "BEN",
                                    "Botswana", "BWA",
                                    "Burkina_Faso", "BFA",
                                    "Burundi", "BDI",
                                    "Cameroon", "CMR",
                                    "Cape_Verde", "CPV",
                                    "Central_African_Republic", "CAF",
                                    "Chad", "TCD",
                                    "Comoros", "COM",
                                    "Democratic_Republic_of_the_Congo", "COD",
                                    "Republic_of_the_Congo", "COG",
                                    "Ivory_Coast", "CIV",
                                    "Djibouti", "DJI",
                                    "Egypt", "EGY",
                                    "Equatorial_Guinea", "GNQ",
                                    "Eritrea", "ERI",
                                    "Eswatini", "SWZ",
                                    "Ethiopia", "ETH",
                                    "Gabon", "GAB",
                                    "The_Gambia", "GMB",
                                    "Ghana", "GHA",
                                    "Guinea", "GIN",
                                    "Guinea-Bissau", "GNB",
                                    "Kenya", "KEN",
                                    "Lesotho", "LSO",
                                    "Liberia", "LBR",
                                    "Libya", "LBY",
                                    "Madagascar", "MDG",
                                    "Malawi", "MWI",
                                    "Mali", "MLI",
                                    "Mauritania", "MRT",
                                    "Mauritius", "MUS",
                                    "Morocco", "MAR",
                                    "Mozambique", "MOZ",
                                    "Namibia", "NAM",
                                    "Niger", "NER",
                                    "Nigeria", "NGA",
                                    "Rwanda", "RWA",
                                    "São_Tomé_and_Príncipe", "STP",
                                    "Senegal", "SEN",
                                    "Seychelles", "SYC",
                                    "Sierra_Leone", "SLE",
                                    "Somalia", "SOM",
                                    "South_Africa", "ZAF",
                                    "South_Sudan", "SSD",
                                    "Sudan", "SDN",
                                    "Tanzania", "TZA",
                                    "Togo", "TGO",
                                    "Tunisia", "TUN",
                                    "Uganda", "UGA",
                                    "Zambia", "ZMB",
                                    "Zimbabwe", "ZWE"),
                                  nrow = 54, ncol = 2, byrow = TRUE, 
                                  dimnames = list(NULL, c("country_name", "ISO"))))



## read gadm data
# gadm <- sf::st_read("data/gadm/gadm_410.gpkg")
# ls(gadm)
# unique(gadm$CONTINENT)
# gadmafrica <- gadm %>%
#   filter(CONTINENT == "Africa") #%>%
# saveRDS(gadmafrica, "data/gadm/gadmafrica.rds")
gadmafrica <- readRDS("data/gadm/gadmafrica.rds")
unique(gadmafrica$NAME_0)
# egypt <- gadmafrica %>%
#   filter(NAME_0 == "Egypt") %>%
#   as.data.frame() %>%
#   select(GID_1, NAME_1) %>%
#   unique()
# morocco <- gadmafrica %>%
#   filter(NAME_0 == "Morocco") %>%
#   as.data.frame() %>%
#   select(GID_1, NAME_1) %>%
#   unique()

## check difference between country dict and gadmafrica in terms of countries
setdiff(country_dict$ISO, gadmafrica$GID_0)
setdiff(gadmafrica$GID_0, country_dict$ISO)
isodiff <- setdiff(gadmafrica$GID_0, country_dict$ISO)
countrycode::countrycode(isodiff, origin = "iso3c", destination = "country.name")

## NAME = official name in latin script
## VARNAME = variant name; alternate names in usage for the place, separated by pipes |
## NL_NAME = non-latin name, official name in a non-latin script (eg arabic, chinese, russian, korean)
## link: https://gadm.org/metadata.html
gadmprep <- gadmafrica %>%
  rename(#uid = UID,
    isocode1 = GID_0,
    isoname1 = NAME_0,
    isocode2full = GID_1,
    isocode3full = GID_2,
    isoname2 = NAME_1,
    altname2 = VARNAME_1,
    nalname2 = NL_NAME_1,
    isoname3 = NAME_2,
    altname3 = VARNAME_2,
    nalname3 = NL_NAME_2) %>%
  select(#uid, 
    isocode1, isoname1,isocode2full, isoname2, isocode3full, isoname3, altname3, nalname3) %>%
  as.data.frame() %>%
  filter(isocode1 %in% country_dict$ISO) %>%
  ## extract the gadm id for the first and second subnational unit to merge with xsub later
  mutate(
         isocode2 = as.numeric(gsub(".*\\.(\\d+)_.*", "\\1", isocode2full)),
         isocode3 = str_replace(str_sub(isocode3full, 7, 8), "_", "")) %>%
  select(-c(isocode3full, isoname3, isocode3, altname3, nalname3)) %>%
  unique()
head(gadmprep)
sum(is.na(gadmprep))

## Ghana has no dot between isocode country and first level administrative region iso number
missings_tab <- gadmprep[rowSums(is.na(gadmprep)) > 0, ]

gadmprep$isocode2[gadmprep$isocode2full == "GHA1_2"] <- 1
gadmprep$isocode2[gadmprep$isocode2full == "GHA2_2"] <- 2
gadmprep$isocode2[gadmprep$isocode2full == "GHA3_2"] <- 3
gadmprep$isocode2[gadmprep$isocode2full == "GHA4_2"] <- 4
gadmprep$isocode2[gadmprep$isocode2full == "GHA5_2"] <- 5
gadmprep$isocode2[gadmprep$isocode2full == "GHA6_2"] <- 6
gadmprep$isocode2[gadmprep$isocode2full == "GHA7_2"] <- 7
gadmprep$isocode2[gadmprep$isocode2full == "GHA8_2"] <- 8
gadmprep$isocode2[gadmprep$isocode2full == "GHA9_2"] <- 9
gadmprep$isocode2[gadmprep$isocode2full == "GHA10_2"] <- 10
gadmprep$isocode2[gadmprep$isocode2full == "GHA11_2"] <- 11
gadmprep$isocode2[gadmprep$isocode2full == "GHA12_2"] <- 12
gadmprep$isocode2[gadmprep$isocode2full == "GHA13_2"] <- 13
gadmprep$isocode2[gadmprep$isocode2full == "GHA14_2"] <- 14
gadmprep$isocode2[gadmprep$isocode2full == "GHA15_2"] <- 15
gadmprep$isocode2[gadmprep$isocode2full == "GHA16_2"] <- 16

sum(is.na(gadmprep))
test <- gadmprep %>%
  filter(isocode1 == "GHA")
str(gadmprep$isocode2)

sum(is.na(africa_dict))
sapply(africa_dict, function(x) sum(is.na(x)))

africa_dict <- left_join(africa_dict, gadmprep, by = c("country" = "isoname1", "gadmname" = "isoname2"))
sum(is.na(africa_dict))
sapply(africa_dict, function(x) sum(is.na(x)))

saveRDS(africa_dict, "rds/lists/africa_dict.rds")


## clear environment
rm(list = ls())

## load libraries
library(wikipediatrend)
library(tidyverse)

africa_dict <- readRDS("rds/lists/africa_dict.rds")
sum(is.na(africa_dict))
sapply(africa_dict, function(x) sum(is.na(x)))

africa_dict <- africa_dict %>%
  replace_na(list(wikipedia_es = as.character(0),
                  wikipedia_fr = as.character(0),
                  wikipedia_zh = as.character(0),
                  wikipedia_hi = as.character(0),
                  wikipedia_ar = as.character(0),
                  wikipedia_bn = as.character(0),
                  wikipedia_ur = as.character(0),
                  wikipedia_pt = as.character(0),
                  wikipedia_de = as.character(0),
                  wikipedia_ru = as.character(0))) %>%
  ## africa dict has one more region than gadmprep
  ## remove that one row
  na.omit()
sum(is.na(africa_dict))
sapply(africa_dict, function(x) sum(is.na(x)))


saveRDS(africa_dict, "rds/lists/africa_dict_filled.rds")
